library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
# hosp_activity_page <- read_csv(here("data/hospital_activity_page.csv"))
# hosp_activity_by_speciality <- 
#   read_csv(here("data/Hospital Activity by Speciality.csv"))

hosp_activity_and_demographics <- 
  read_csv(here("data/Hospital Activity and Patient Demographics.csv"))
## Rows: 129599 Columns: 19
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (7): _id, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Len...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_and_deprivation <- 
  read_csv(here("data/Hospital Activity and Deprivation.csv"))
## Rows: 40894 Columns: 19
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (8): _id, SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stay...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 
# aAndE_waiting_times <- 
#   read_excel(here("data/monthly_ae_waitingtimes_A&E attendances and performance data_202201.xlsx"))
covid_ads_HB_deprivation <- 
  read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board and Deprivation_20220302.xlsx"))

# covid_ads_HB_and_speciality <- 
#   read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board and Specialty_20220302.xlsx"))
covid_ads_HB_age_sex <- 
  read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board, Age and Sex_20220302.xlsx"))
covid_ads_HSCP_deprivation <- 
   read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP and Deprivation_20220302.xlsx"))
# covid_ads_HSCP_speciality <- 
#   read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP and Specialty_20220302.xlsx"))
covid_ads_HSCP_age_sex <- 
  read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP, Age and Sex_20220302.xlsx"))

Clean names

hosp_activity_and_demographics <- 
  janitor::clean_names(hosp_activity_and_demographics)
hosp_activity_and_deprivation <- 
  janitor::clean_names(hosp_activity_and_deprivation)
covid_ads_HB_deprivation <- 
  janitor::clean_names(covid_ads_HB_deprivation)
covid_ads_HB_age_sex <- 
  janitor::clean_names(covid_ads_HB_age_sex)
covid_ads_HSCP_deprivation <- 
  janitor::clean_names(covid_ads_HSCP_deprivation)
covid_ads_HSCP_age_sex <- 
  janitor::clean_names(covid_ads_HSCP_age_sex)

Do they match?

names(hosp_activity_and_demographics)
##  [1] "id"                           "quarter"                     
##  [3] "quarter_qf"                   "hb"                          
##  [5] "hbqf"                         "location"                    
##  [7] "location_qf"                  "admission_type"              
##  [9] "admission_type_qf"            "sex"                         
## [11] "age"                          "episodes"                    
## [13] "length_of_episode"            "average_length_of_episode"   
## [15] "average_length_of_episode_qf" "stays"                       
## [17] "length_of_stay"               "average_length_of_stay"      
## [19] "average_length_of_stay_qf"
names(hosp_activity_and_deprivation)
##  [1] "id"                           "quarter"                     
##  [3] "quarter_qf"                   "hb"                          
##  [5] "hbqf"                         "location"                    
##  [7] "location_qf"                  "admission_type"              
##  [9] "admission_type_qf"            "simd"                        
## [11] "simdqf"                       "episodes"                    
## [13] "length_of_episode"            "average_length_of_episode"   
## [15] "average_length_of_episode_qf" "stays"                       
## [17] "length_of_stay"               "average_length_of_stay"      
## [19] "average_length_of_stay_qf"
names(covid_ads_HB_deprivation)
## [1] "week_ending"       "hb"                "hbqf"             
## [4] "simd_quintile"     "admission_type"    "admission_type_qf"
## [7] "number_admissions" "average20182019"   "percent_variation"
names(covid_ads_HB_age_sex)
##  [1] "week_ending"       "hb"                "hbqf"             
##  [4] "age_group"         "age_group_qf"      "sex"              
##  [7] "sex_qf"            "admission_type"    "admission_type_qf"
## [10] "number_admissions" "average20182019"   "percent_variation"
names(covid_ads_HSCP_deprivation)
## [1] "week_ending"       "hscp"              "simd_quintile"    
## [4] "admission_type"    "admission_type_qf" "number_admissions"
## [7] "average20182019"   "percent_variation"
names(covid_ads_HSCP_age_sex)
##  [1] "week_ending"       "hscp"              "age_group"        
##  [4] "age_group_qf"      "sex"               "sex_qf"           
##  [7] "admission_type"    "admission_type_qf" "number_admissions"
## [10] "average20182019"   "percent_variation"
hosp_activity_and_demographics %>% 
  distinct(age)
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values
hosp_activity_and_demographics %>% 
  mutate(age = is.na(age)) %>% 
  filter(age == TRUE)
# 7 General hosp admission types for both demographics and deprivation:
# Elective Inpatients
# Emergency Inpatients,  (THIS IS THE ONLY DEFINITELY ACUTE ONE)
# Transfers
# All Day cases
# All Inpatients
# All inpatients and Day Cases
# Not Specified 
# No NAs.
hosp_activity_and_demographics %>% 
  distinct(admission_type)
hosp_activity_and_deprivation %>% 
  distinct(admission_type)
# 3 COVID ADMISSION TYPES for HB and HSCP - deprivation and age/sex: 
# All - Emergency - Planned
covid_ads_HB_deprivation %>% 
  distinct(admission_type)
covid_ads_HB_age_sex %>% 
  distinct(admission_type)
covid_ads_HSCP_deprivation %>% 
  distinct(admission_type)
covid_ads_HSCP_age_sex %>% 
  distinct(admission_type)  
# tried them all for NA and there are none
# hosp_activity_and_demographics %>% 
#   mutate(admission_type = is.na(admission_type)) %>% 
#   filter(admission_type == TRUE)



General Hospital Admissions

November 14th 2019 - February 22nd 2020

  • This data is by QUARTER (COVID admissions is by week ending from May 2020 to March 2022
  • SEX here is “Male”, “Female”, COVID data also has Male, Female, and All
  • AGE groups are in 10 year increments with (from “0-10 to”90 and over”) COVID data is a lot dirtier
  • These are general hospital admissions - not by health board etc


Sex


# 2 sex = Female, Male 
hosp_activity_and_demographics %>% 
  distinct(sex)
hosp_activity_and_demographics %>% 
  mutate(sex = is.na(sex)) %>% 
  filter(sex == TRUE)
# No NAs


Sex against time and admissions

Filter for acute services:

# might want to make use of this if we're not sure about admission types
# acute_target <- c("Emergency Inpatients", "Not Specified")
# Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All 
# Inpatients and Day cases, Not Specified

general_admissions_acute_age_sex <- hosp_activity_and_demographics %>% 
  filter(admission_type == "Emergency Inpatients")
general_admissions_acute_age_sex

Now group by and create graphs over time:

gen_admissions_sex_per_quarter <- general_admissions_acute_age_sex %>% 
  group_by(quarter, sex) %>% 
  summarise(total_admissions_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
gen_admissions_sex_per_quarter %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_admissions_per_quarter, 
      group = sex, colour = sex)+
  geom_line() + 
  labs(x = "Quarter", 
       y = "Total Admissions", 
       title = "Total Emergency Inpatient Admissions",
       subtitle = "November 2019 to February 2022", 
       colour = "Sex") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))


Age

hosp_activity_and_demographics %>% 
  distinct(age) # 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values 

hosp_activity_and_demographics %>% 
  mutate(age = is.na(age)) %>% 
  filter(age == TRUE) # no NAs
gen_admissions_age_per_quarter <- general_admissions_acute_age_sex %>% 
  # remember this is only looking at Emergency Inpatients but there are 7 groups
  group_by(quarter, age) %>% 
  summarise(total_admissions_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
# Note that it's 0-9years that are the low values and I'll make this better soon
gen_admissions_age_per_quarter %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_admissions_per_quarter, 
      group = age, colour = age)+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Admissions", 
       title = "Total Emergency Inpatient Admissions",
       subtitle = "November 2019 to February 2022", 
       colour = "Age Group") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

gen_admissions_age_per_quarter %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_admissions_per_quarter, 
      group = age)+
  geom_col() + 
  facet_wrap(~age)+
  labs(x = "Yearly Quarter", 
       y = "Total Admissions", 
       title = "JUST MADE THIS TO SEE THE DIFFERENCE - AWARE IT'S MESSY - 
       Total Emergency Inpatient Admissions",
       subtitle = "November 2019 to February 2022") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))


SIMD (Scottish Index of Multiple Deprivation)

  • This is a quintile scale (1 = “Most Deprived”, 5 = “Least Deprived”)
  • The most appropriate SIMD released for each year is used.


# We have the same admission types here as above so I'll do the same - i.e. only
# include the Emergency Inpatients though there are 7 groups
hosp_activity_and_deprivation %>% 
  distinct(admission_type)
general_admissions_acute_deprivation <- hosp_activity_and_deprivation %>% 
  filter(admission_type == "Emergency Inpatients")
general_admissions_acute_deprivation

SIMD Types * note there are 962 NAs: there are codes in the simdqf column as to why * sometimes it’s just a general geographical group

general_admissions_acute_deprivation %>% 
  group_by(simd) %>% 
  summarise(total_for_each_simd = n())
gen_admissions_simd_per_quarter <- general_admissions_acute_deprivation %>% 
  group_by(quarter, simd) %>% 
  drop_na(simd) %>% 
  summarise(total_admission_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
gen_admissions_simd_per_quarter
gen_admissions_simd_per_quarter %>% 
  ggplot() + 
  aes(x = quarter, 
      y = total_admission_per_quarter, 
      fill = simd) + 
  geom_col(position = "dodge") + #why won' it dodge? Not enough space??
  labs(x = "Yearly Quarter", 
       y = "Total Admissions", 
       title = "Total Emergency Inpatient Admissions by Deprivation Level")+#, 
       #colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 0.9))

gen_admissions_simd_per_quarter %>% 
  ggplot() + 
  aes(x = quarter, 
      y = total_admission_per_quarter, 
      fill = simd) + 
  geom_col(position = "dodge") + 
  facet_wrap(~simd)+
  labs(x = "Yearly Quarter", 
       y = "Total Admissions", 
       title = "Total Emergency Inpatient Admissions by Deprivation Level", 
       colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 0.9))

# hmmm, that's not a very nice line graph:
gen_admissions_simd_per_quarter %>% 
  ggplot() + 
  aes(x = quarter, 
      y = total_admission_per_quarter, 
      group = simd, 
      colour = simd) + 
  geom_line() +
  labs(x = "Yearly Quarter", 
       y = "Total Admissions", 
       title = "Total Emergency Inpatient Admissions by Deprivation Level")+#, 
       #colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 0.9))



## COVID ADMISSIONS

May 21st 2020 - March 2nd, 2022

Sex and gender by healthboard


NOTE sex: “All” = 33951 rows, Female = 4782, Male = 4783


# by healthboard

# admission types are either emergncy or planned - that's good news for us
covid_ads_HB_age_sex %>% 
  distinct(admission_type)
covid_acute_HB_age_sex <- covid_ads_HB_age_sex %>% 
  filter(admission_type == "Emergency")

# this is not neatly divided and there is an age group "41760" hmm
covid_acute_HB_age_sex %>% 
  group_by(age_group) %>% 
  summarise(count_in_age_group = n())


# nope
covid_acute_HB_age_sex %>% 
  group_by(age_group) %>% 
  ggplot()+
  aes(x = number_admissions, 
      y = average20182019, 
      fill = age_group)+
  geom_point()

covid_ads_HB_age_sex %>% 
  group_by(sex) %>% 
  summarise(total_of_each_sex = n())
covid_ads_HB_demographics_sex <- covid_ads_HB_age_sex %>% 
  group_by(week_ending, sex) %>% 
  summarise(total_admissions_for_week_ending = n())
## `summarise()` has grouped output by 'week_ending'. You can override using the
## `.groups` argument.
covid_ads_HB_demographics_sex
covid_ads_HB_demographics_sex %>% 
  ggplot()+
  aes(x = week_ending, 
      y = total_admissions_for_week_ending, 
      group = sex, 
      colour = sex)+
  geom_line()+
  labs(title = "I AM AWARE THIS IS AWFUL")